required_packages <- c("readxl")
# Install and load packages if not already installed
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}

#import data frame
if (!exists("df")) {
  file_path <- "~/cohort.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}

#filter df 
df_filtered <- df[, c("r_upper_suv_mean", "r_upper_suv_min", "r_upper_suv_max","r_upper_suv_sd", 
                      "r_lower_suv_mean", "r_lower_suv_min", "r_lower_suv_max","r_lower_suv_sd",
                      "l_upper_suv_mean", "l_upper_suv_min", "l_upper_suv_max","l_upper_suv_sd",
                      "l_lower_suv_mean", "l_lower_suv_min", "l_lower_suv_max","l_lower_suv_sd",
                      "liver1_suv_mean" , "liver1_suv_min","liver1_suv_max","liver1_suv_sd",
                      "liver2_suv_mean","liver2_suv_min","liver2_suv_max","liver2_suv_sd",
                      "aorta_suv_mean","aorta_suv_min","aorta_suv_max","aorta_suv_sd","side_r0_l1",
                      "copd","pet_device","pet_distance_ic")]
df_filtered[] <- lapply(df_filtered, as.numeric)
df_filtered <- subset(df_filtered, pet_distance_ic <= 365 | is.na(pet_distance_ic))
df_filtered <- subset(df_filtered, pet_distance_ic >= -1)

#add cols
cols <- c("no_tumor_suv_mean", "no_tumor_suv_min", "no_tumor_suv_max", "no_tumor_suv_sd", 
          "suv_95", "no_tumor_suv_95", "upper_suv_95", "lower_suv_95", 
          "liver_ratio_mean", "liver_ratio_max", "liver_ratio_min", "liver_ratio_sd",
          "liver_ratio_lower_mean", "liver_ratio_lower_max", "liver_ratio_lower_min", "liver_ratio_lower_sd",
          "liver_ratio_upper_mean", "liver_ratio_upper_max", "liver_ratio_upper_min", "liver_ratio_upper_sd",
          "liver_no_tumor_ratio_mean", "liver_no_tumor_ratio_max", "liver_no_tumor_ratio_min", "liver_no_tumor_ratio_sd",
          "aorta_ratio_mean", "aorta_ratio_max", "aorta_ratio_min", "aorta_ratio_sd",
          "aorta_ratio_lower_mean", "aorta_ratio_lower_max", "aorta_ratio_lower_min", "aorta_ratio_lower_sd",
          "aorta_ratio_upper_mean", "aorta_ratio_upper_max", "aorta_ratio_upper_min", "aorta_ratio_upper_sd",
          "aorta_no_tumor_ratio_mean", "aorta_no_tumor_ratio_max", "aorta_no_tumor_ratio_min", "aorta_no_tumor_ratio_sd")
df_filtered[,cols] <- NA

#calculate SUV
for (i in 1:nrow(df_filtered)) {
  if (is.na(df_filtered$l_upper_suv_mean[i]) & !is.na(df_filtered$r_upper_suv_mean[i])) {
    df_filtered$l_upper_suv_max[i] <- df_filtered$r_upper_suv_max[i]
    df_filtered$l_upper_suv_min[i] <- df_filtered$r_upper_suv_min[i]
    df_filtered$l_upper_suv_mean[i] <- df_filtered$r_upper_suv_mean[i]
    df_filtered$l_upper_suv_sd[i] <- df_filtered$r_upper_suv_sd[i]
  }
  if (is.na(df_filtered$r_upper_suv_mean[i]) & !is.na(df_filtered$l_upper_suv_mean[i])) {
    df_filtered$r_upper_suv_max[i] <- df_filtered$l_upper_suv_max[i]
    df_filtered$r_upper_suv_min[i] <- df_filtered$l_upper_suv_min[i]
    df_filtered$r_upper_suv_mean[i] <- df_filtered$l_upper_suv_mean[i]
    df_filtered$r_upper_suv_sd[i] <- df_filtered$l_upper_suv_sd[i]
  }
  if (is.na(df_filtered$l_lower_suv_mean[i]) & !is.na(df_filtered$r_lower_suv_mean[i])) {
    df_filtered$l_lower_suv_max[i] <- df_filtered$r_lower_suv_max[i]
    df_filtered$l_lower_suv_min[i] <- df_filtered$r_lower_suv_min[i]
    df_filtered$l_lower_suv_mean[i] <- df_filtered$r_lower_suv_mean[i]
    df_filtered$l_lower_suv_sd[i] <- df_filtered$r_lower_suv_sd[i]
  }
  if (is.na(df_filtered$r_lower_suv_mean[i]) & !is.na(df_filtered$l_lower_suv_mean[i])) {
    df_filtered$r_lower_suv_max[i] <- df_filtered$l_lower_suv_max[i]
    df_filtered$r_lower_suv_min[i] <- df_filtered$l_lower_suv_min[i]
    df_filtered$r_lower_suv_mean[i] <- df_filtered$l_lower_suv_mean[i]
    df_filtered$r_lower_suv_sd[i] <- df_filtered$l_lower_suv_sd[i]
  }
  if (is.na(df_filtered$liver1_suv_mean[i]) & !is.na(df_filtered$liver2_suv_mean[i])) {
    df_filtered$liver1_suv_max[i] <- df_filtered$liver2_suv_max[i]
    df_filtered$liver1_suv_min[i] <- df_filtered$liver2_suv_min[i]
    df_filtered$liver1_suv_mean[i] <- df_filtered$liver2_suv_mean[i]
    df_filtered$liver1_suv_sd[i] <- df_filtered$liver2_suv_sd[i]
  }
  if (is.na(df_filtered$liver2_suv_mean[i]) & !is.na(df_filtered$liver1_suv_mean[i])) {
    df_filtered$liver2_suv_max[i] <- df_filtered$liver1_suv_max[i]
    df_filtered$liver2_suv_min[i] <- df_filtered$liver1_suv_min[i]
    df_filtered$liver2_suv_mean[i] <- df_filtered$liver1_suv_mean[i]
    df_filtered$liver2_suv_sd[i] <- df_filtered$liver1_suv_sd[i]
  }
  if (!is.na(df_filtered$side_r0_l1[i])) {
    if (df_filtered$side_r0_l1[i] == 1 & !is.na(df_filtered$l_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$l_lower_suv_mean[i], df_filtered$l_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$l_lower_suv_min[i], df_filtered$l_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$l_lower_suv_max[i], df_filtered$l_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$l_lower_suv_sd[i], df_filtered$l_upper_suv_sd[i], na.rm = TRUE)
    } else if (df_filtered$side_r0_l1[i] == 0 & !is.na(df_filtered$r_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$r_lower_suv_mean[i], df_filtered$r_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$r_lower_suv_min[i], df_filtered$r_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$r_lower_suv_max[i], df_filtered$r_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$r_lower_suv_sd[i], df_filtered$r_upper_suv_sd[i], na.rm = TRUE)
    }
  }
}

df_filtered$upper_suv_mean <- rowMeans(df_filtered[, c("r_upper_suv_mean", "l_upper_suv_mean")], na.rm = TRUE)
df_filtered$upper_suv_max <- rowMeans(df_filtered[, c("r_upper_suv_max", "l_upper_suv_max")], na.rm = TRUE)
df_filtered$upper_suv_min <- rowMeans(df_filtered[, c("r_upper_suv_min", "l_upper_suv_min")], na.rm = TRUE)
df_filtered$upper_suv_sd <- rowMeans(df_filtered[, c("r_upper_suv_sd", "l_upper_suv_sd")], na.rm = TRUE)

df_filtered$lower_suv_mean <- rowMeans(df_filtered[, c("r_lower_suv_mean", "l_lower_suv_mean")], na.rm = TRUE)
df_filtered$lower_suv_max <- rowMeans(df_filtered[, c("r_lower_suv_max", "l_lower_suv_max")], na.rm = TRUE)
df_filtered$lower_suv_min <- rowMeans(df_filtered[, c("r_lower_suv_min", "l_lower_suv_min")], na.rm = TRUE)
df_filtered$lower_suv_sd <- rowMeans(df_filtered[, c("r_lower_suv_sd", "l_lower_suv_sd")], na.rm = TRUE)

df_filtered$suv_lung_mean <- rowMeans(df_filtered[, c("upper_suv_mean", "lower_suv_mean")], na.rm = TRUE)
df_filtered$suv_lung_max <- rowMeans(df_filtered[, c("upper_suv_max", "lower_suv_max")], na.rm = TRUE)
df_filtered$suv_lung_min <- rowMeans(df_filtered[, c("upper_suv_min", "lower_suv_min")], na.rm = TRUE)
df_filtered$suv_lung_sd <- rowMeans(df_filtered[, c("upper_suv_sd", "lower_suv_sd")], na.rm = TRUE)

df_filtered$liver_suv_mean <- rowMeans(df_filtered[, c("liver1_suv_mean", "liver2_suv_mean")], na.rm = TRUE)
df_filtered$liver_suv_max <- rowMeans(df_filtered[, c("liver1_suv_max", "liver2_suv_max")], na.rm = TRUE)
df_filtered$liver_suv_min <- rowMeans(df_filtered[, c("liver1_suv_min", "liver2_suv_min")], na.rm = TRUE)
df_filtered$liver_suv_sd <- rowMeans(df_filtered[, c("liver1_suv_sd", "liver2_suv_sd")], na.rm = TRUE)

for (i in 1:nrow(df_filtered)) {
  df_filtered$suv_95[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.95) * df_filtered$suv_lung_sd[i])
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$no_tumor_suv_95[i] <- df_filtered$no_tumor_suv_mean[i] + (qnorm(0.95) * df_filtered$no_tumor_suv_sd[i])
    df_filtered$upper_suv_95[i] <- df_filtered$upper_suv_mean[i] + (qnorm(0.95) * df_filtered$upper_suv_sd[i])
    df_filtered$lower_suv_95[i] <- df_filtered$lower_suv_mean[i] + (qnorm(0.95) * df_filtered$lower_suv_sd[i])
  }
  df_filtered$liver_ratio_mean[i] <- (df_filtered$suv_lung_mean[i] /df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_max[i] <- (df_filtered$suv_lung_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_min[i] <- (df_filtered$suv_lung_min[i]/df_filtered$liver_suv_min[i]) 
  df_filtered$liver_ratio_sd[i] <- (df_filtered$suv_lung_sd[i]/df_filtered$liver_suv_sd[i]) 
  
  df_filtered$liver_ratio_upper_mean[i] <- (df_filtered$upper_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_upper_max[i] <- (df_filtered$upper_suv_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_upper_min[i] <- (df_filtered$upper_suv_min[i]/df_filtered$liver_suv_min[i])
  df_filtered$liver_ratio_upper_sd[i] <- (df_filtered$upper_suv_sd[i]/df_filtered$liver_suv_sd[i])
  
  df_filtered$liver_ratio_lower_mean[i] <- (df_filtered$lower_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_lower_max[i] <- (df_filtered$lower_suv_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_lower_min[i] <- (df_filtered$lower_suv_min[i]/df_filtered$liver_suv_min[i])
  df_filtered$liver_ratio_lower_sd[i] <- (df_filtered$lower_suv_sd[i]/df_filtered$liver_suv_sd[i])
  
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$liver_no_tumor_ratio_mean[i] <- (df_filtered$no_tumor_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
    df_filtered$liver_no_tumor_ratio_max[i]<- (df_filtered$no_tumor_suv_max[i]/df_filtered$liver_suv_max[i]) 
    df_filtered$liver_no_tumor_ratio_min[i]<- (df_filtered$no_tumor_suv_min[i]/df_filtered$liver_suv_min[i]) 
    df_filtered$liver_no_tumor_ratio_sd[i]<- (df_filtered$no_tumor_suv_sd[i]/df_filtered$liver_suv_sd[i]) 
  }
  df_filtered$aorta_ratio_mean[i] <- (df_filtered$suv_lung_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_max[i] <- (df_filtered$suv_lung_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_min[i] <- (df_filtered$suv_lung_min[i]/df_filtered$aorta_suv_min[i]) 
  df_filtered$aorta_ratio_sd[i] <- (df_filtered$suv_lung_sd[i]/df_filtered$aorta_suv_sd[i]) 
  
  df_filtered$aorta_ratio_upper_mean[i] <- (df_filtered$upper_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_upper_max[i] <- (df_filtered$upper_suv_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_upper_min[i] <- (df_filtered$upper_suv_min[i]/df_filtered$aorta_suv_min[i]) 
  df_filtered$aorta_ratio_upper_sd[i] <- (df_filtered$upper_suv_sd[i]/df_filtered$aorta_suv_sd[i])
  
  df_filtered$aorta_ratio_lower_mean[i] <- (df_filtered$lower_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_lower_max[i] <- (df_filtered$lower_suv_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_lower_min[i] <- (df_filtered$lower_suv_min[i]/df_filtered$aorta_suv_min[i])
  df_filtered$aorta_ratio_lower_sd[i] <- (df_filtered$lower_suv_sd[i]/df_filtered$aorta_suv_sd[i])
  
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$aorta_no_tumor_ratio_mean[i] <- (df_filtered$no_tumor_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
    df_filtered$aorta_no_tumor_ratio_max[i]<- (df_filtered$no_tumor_suv_max[i]/df_filtered$aorta_suv_max[i]) 
    df_filtered$aorta_no_tumor_ratio_min[i]<- (df_filtered$no_tumor_suv_min[i]/df_filtered$aorta_suv_min[i]) 
    df_filtered$aorta_no_tumor_ratio_sd[i] <- (df_filtered$no_tumor_suv_sd[i]/df_filtered$aorta_suv_sd[i]) 
  }
}

#define variables
variables <- c("suv_lung_max","upper_suv_max","lower_suv_max","no_tumor_suv_max",
               "suv_lung_mean","upper_suv_mean","lower_suv_mean","no_tumor_suv_mean",
               "suv_95","upper_suv_95","lower_suv_95","no_tumor_suv_95",
               "liver_ratio_mean","liver_ratio_upper_mean", "liver_ratio_lower_mean","liver_no_tumor_ratio_mean", 
               "aorta_ratio_mean","aorta_ratio_upper_mean","aorta_ratio_lower_mean","aorta_no_tumor_ratio_mean")

results <- data.frame(Variable = character(0), p_value = numeric(0))

library(PMCMRplus)

for (variable in variables)  {
  formula <- as.formula(paste(variable, "~ pet_device"))
  model <- kruskal.test(formula, data = df_filtered)
  results <- rbind(results, data.frame(Variable = variable, p_value = round(model$p.value,4)))
}

#function for new rows in results df
insert_row <- function(variables, row){
  new_row <- data.frame(Variable = variables, p_value = "", Significance = "")
  results <<- rbind(results[0:row,], new_row, results[(row+1):nrow(results),])
}

#add significance to df
for (i in 1:nrow(results)) {
  if (results$p_value[i] < 0.001) {
    results$Significance[i] <- "***"
  } else if (results$p_value[i] < 0.01) {
    results$Significance[i] <- "**"
  } else if (results$p_value[i] < 0.05) {
    results$Significance[i] <- "*"
  } else {
    results$Significance[i] <- ""
  }
}

#bonferroni
variable_count <- length(variables)
adjusted_alpha <- 0.05 / variable_count
print(paste(variable_count," variables. Bonferroni adjusted alpha:",adjusted_alpha))

#add rows and sort df
insert_row("SUVMAX", 0)
insert_row("SUVMEAN", 5)
insert_row("SUV95", 10)
insert_row("SUVMEAN lung/liver", 15)
insert_row("SUVMEAN lung/blood pool", 20)
rownames(results) <- 1:nrow(results)

#rename variables in df
results[results == "suv_lung_max"] <- "whole lung"
results[results == "upper_suv_max"] <- "upper lung"
results[results == "lower_suv_max"] <- "lower lung"
results[results == "no_tumor_suv_max"] <- "TFL"

results[results == "suv_lung_mean"] <- "whole lung"
results[results == "upper_suv_mean"] <- "upper lung"
results[results == "lower_suv_mean"] <- "lower lung"
results[results == "no_tumor_suv_mean"] <- "TFL"

results[results == "suv_95"] <- "whole lung"
results[results == "upper_suv_95"] <- "upper lung"
results[results == "lower_suv_95"] <- "lower lung"
results[results == "no_tumor_suv_95"] <- "TFL"

results[results == "liver_ratio_mean"] <- "whole lung"
results[results == "liver_ratio_upper_mean"] <- "upper lung"
results[results == "liver_ratio_lower_mean"] <- "lower lung"
results[results == "liver_no_tumor_ratio_mean"] <- "TFL"

results[results == "aorta_ratio_mean"] <- "whole lung"
results[results == "aorta_ratio_upper_mean"] <- "upper lung"
results[results == "aorta_ratio_lower_mean"] <- "lower lung"
results[results == "aorta_no_tumor_ratio_mean"] <- "TFL"

print(results)

#safe
write.csv(results, file = "Table A1.csv", row.names = FALSE)
print("saved:Table A1.csv")

#text output for paper
print (paste("(mean SUVmean: p =",round(min(as.numeric(results$p_value[7:10])),3), "-",round(max(as.numeric(results$p_value[7:10])),3),
             " and SUV95: p =",round(min(as.numeric(results$p_value[12:15])),4), "-",round(max(as.numeric(results$p_value[12:15])),3),")"))
print (paste("...mean SUVmax (p =",round(min(as.numeric(results$p_value[2:5])),3), "-",round(max(as.numeric(results$p_value[2:5])),3),
             ") and lung/blood ratio (p =",round(min(as.numeric(results$p_value[22:25])),3), "-",round(max(as.numeric(results$p_value[22:25])),3),")"))